Setup

start_time <- Sys.time() 

# Import functions
root = "./"
source(file.path(root,"general_functions.R"))
import_parameters(params)
## **** __Utilized Cores__ **** = 4$subsetGenes
## [1] "protein_coding"
## 
## $subsetCells
## [1] "F"
## 
## $resolution
## [1] "0.001"
## 
## $resultsPath
## [1] "Results/subsetGenes-protein_coding__subsetCells-F__Resolution-0.001__perplexity-30__nCores-4"
## 
## $nCores
## [1] "4"
## 
## $perplexity
## [1] "30"
load(file.path(resultsPath, "scRNAseq_results.RData"))
# resultsPath <- "Results/subsetGenes-protein_coding__subsetCells-F__Resolution-0.2__perplexity-40__nCores-4" 

library(Seurat)
library(cowplot)
library(ggplot2)
library(dplyr) 
library(data.table) 
library(readxl) 
library(reshape2)
library(ggrepel)
library(plotly)

library(GeneOverlap) # BiocManager::install("GeneOverlap") 
library(enrichR) #BiocManager::install("enrichR")

library(monocle) # BiocManager::install("monocle")   
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett")
 
#  
if(getwd()=="/Users/schilder/Desktop/PD_scRNAseq"){
  allGenes <- F
  test.use <- "wilcox"
}else{
  allGenes <- T
  # MAST not install on Minerva...
  test.use <- "wilcox"}
allGenes
## [1] TRUE
# session_info() #Doesn't work on Minerva?

Convert back to Seurat

CDS_to_Seurat <- function(cds, export_PC=F, export_UMAP=F, export_tSNE=F){ 
  # sum(colnames(mDAT) != colnames( mDAT@reducedDimS)) 
  ## Manually save reduced dimensions
  if(export_PC==T){
    mDAT$PC1 <- mDAT@normalized_data_projection[,1]
    mDAT$PC2 <- mDAT@normalized_data_projection[,2]
    mDAT$PC3 <- mDAT@normalized_data_projection[,3] 
  }
  if(export_UMAP==T){
    mDAT$UMAP1 <- mDAT@reducedDimA[1,]
    mDAT$UMAP2 <- mDAT@reducedDimA[2,]
    mDAT$UMAP3 <- mDAT@reducedDimA[3,]
  }
  DAT <- exportCDS(mDAT, export_to = "Seurat", export_all = T)
  DAT@scale.data <- DAT@data #Data was already scaled in Monocle
  # DAT <- Seurat::AddMetaData(DAT, pData(mDAT)[c("garnett_cluster","cell_type","cluster_ext_type")])
  return(DAT)
}
DAT <- CDS_to_Seurat(mDAT, export_PC = T, export_UMAP = T)
## Scaling data matrix
head(DAT@meta.data) 
##                  nGene nUMI orig.ident singlet.or.not.binary       HTO
## GTCATTTTCCGCATCT  2035 6729  RAJ_13357                     1 NYUMD0011
## ATAAGAGAGGAGTTGC  1914 6718  RAJ_13357                     1   BID0076
## CATTATCCATGGTCTA  1971 6156  RAJ_13357                     1  MSMD0067
## CTTAGGAGTGGCGAAT  1893 6392  RAJ_13357                     1 NYUMD0011
## CATCAGAAGCACCGCT  1967 6110  RAJ_13357                     1  MSMD0067
## GCGGGTTAGTAGCCGA  1868 5977  RAJ_13357                     1 NYUMD0011
##                         ID percent.mito res.2 res.1 res.0.6 res.0.3
## GTCATTTTCCGCATCT  MSMD0207   0.04191439     0     1       0       0
## ATAAGAGAGGAGTTGC  BIMD0076   0.04362066     0     1       0       0
## CATTATCCATGGTCTA NYUMD0015   0.03086921     0     1       0       0
## CTTAGGAGTGGCGAAT  MSMD0207   0.03613892     0     1       0       0
## CATCAGAAGCACCGCT NYUMD0015   0.04437531    19    10       8       0
## GCGGGTTAGTAGCCGA  MSMD0207   0.03514056     6     2       0       0
##                           barcode      dx     mut Ethnicity Gender Age
## GTCATTTTCCGCATCT GTCATTTTCCGCATCT      PD      PD     White      M  71
## ATAAGAGAGGAGTTGC ATAAGAGAGGAGTTGC      PD     GBA     White      M  47
## CATTATCCATGGTCTA CATTATCCATGGTCTA control control     White      M  51
## CTTAGGAGTGGCGAAT CTTAGGAGTGGCGAAT      PD      PD     White      M  71
## CATCAGAAGCACCGCT CATCAGAAGCACCGCT control control     White      M  51
## GCGGGTTAGTAGCCGA GCGGGTTAGTAGCCGA      PD      PD     White      M  71
##                  Size_Factor louvain_component Cluster garnett_cluster
## GTCATTTTCCGCATCT    5.117674                 1       1              12
## ATAAGAGAGGAGTTGC    5.109379                 1       1              12
## CATTATCCATGGTCTA    4.661479                 1       1               2
## CTTAGGAGTGGCGAAT    4.844786                 1       1               2
## CATCAGAAGCACCGCT    4.610054                 1       1               2
## GCGGGTTAGTAGCCGA    4.508861                 1       1               5
##                  cell_type cluster_ext_type cluster_ext_type_filt      PC1
## GTCATTTTCCGCATCT Monocytes        Monocytes             Monocytes 21.74363
## ATAAGAGAGGAGTTGC Monocytes        Monocytes             Monocytes 20.51278
## CATTATCCATGGTCTA Monocytes        Monocytes             Monocytes 20.73024
## CTTAGGAGTGGCGAAT Monocytes        Monocytes             Monocytes 21.48480
## CATCAGAAGCACCGCT Monocytes        Monocytes             Monocytes 18.94471
## GCGGGTTAGTAGCCGA Monocytes        Monocytes             Monocytes 17.77910
##                          PC2       PC3    UMAP1    UMAP2    UMAP3
## GTCATTTTCCGCATCT  0.72274985 -2.813874 1.365752 1.513548 1.161469
## ATAAGAGAGGAGTTGC  1.02458096 -3.651205 1.354717 1.488010 1.172568
## CATTATCCATGGTCTA -0.03820978 -2.694958 1.351607 1.519749 1.158323
## CTTAGGAGTGGCGAAT -0.42949109 -1.943783 1.369097 1.527969 1.158835
## CATCAGAAGCACCGCT  4.19589928 -4.857566 1.305207 1.470326 1.143383
## GCGGGTTAGTAGCCGA  2.61114660 -7.713996 1.303913 1.455874 1.130939

Biomarker Expression

gene_gene_plot <- function(DAT, markerList, colorby, title="", plot=T, legend=T, filterZeros=T){ 
  markerData <- DAT@data[row.names(DAT@data) %in% markerList,] %>% t() %>%
    as.matrix() %>%  data.table(keep.rownames = T, key = "rn")
  markerData[markerData$markerList[1]==0,] <- NA
  markerData[markerData$markerList[2]==0,] <- NA 
  colnames(markerData)[2:length(markerData)] <- c("Gene1", "Gene2")
  ## Efficiently merge using data.table
  # dt1 <- data.table(markerData, keep.rownames = "cell_type", key = "cell_type") %>% copy()
  dt2 <- data.table(DAT@meta.data, keep.rownames = "Cell", key = "Cell") %>% copy()
  row.names(dt2) <- dt2$Cell
  markerDT <- markerData[dt2]
  if(filterZeros==T){markerDT <- subset(markerDT, Gene1!=0 & Gene2!=0)}
  
  p <- ggplot(data = markerDT, aes(x=Gene1, y=Gene2 )) +
    stat_density_2d(aes(fill = ..level..), 
                    geom = "polygon", colour="purple", bins = 10, size=.1) +
    geom_point(aes(color=as.factor(eval(parse(text=colorby)))), shape=16, stroke=0, size=2, alpha=.5) +
    guides(colour = guide_legend(title=colorby, override.aes = list(alpha = 1))) + 
      scale_color_manual(values = pretty_colors(mDAT, var = colorby)) +
    labs(x=markerList[1], y=markerList[2], title=title) +
    geom_smooth(method="lm")
  if(legend==F){p <- p + theme(legend.position = "none")}
  if(plot==T){print(p)}
  return(p)
} 

Monocyte Markers

p <- gene_gene_plot(DAT, c("CD14", "FCGR3A"), colorby="cluster_ext_type_filt", 
               title="Monocyte Subtype Markers")

HLA Genes vs. LRRK2

d <- DAT@data@Dimnames[[1]] 
HLA_genes  <- d[startsWith(d, "HLA-DR")] 
HLA_1 <- gene_gene_plot(DAT, c("LRRK2", HLA_genes[1]), colorby="cluster_ext_type_filt", 
               title="", plot = F, legend=F)
HLA_2 <- gene_gene_plot(DAT, c("LRRK2", HLA_genes[2]), colorby="cluster_ext_type_filt", 
               title="", plot = F, legend=F)
HLA_3 <- gene_gene_plot(DAT, c("LRRK2", HLA_genes[3]), colorby="cluster_ext_type_filt", 
               title="", plot = F, legend=F)
plot_grid(HLA_1, HLA_2, HLA_3)

tSNE FeaturePlots

Need to get tSNE from Monocle into Seurat somehow.

Monocyte Markers

FeaturePlot(DAT,features.plot =  c("CD14","FCGR3A"),  
            cols.use = c("grey","red","blue","purple"),
            dark.theme = T, nCol = 2, overlay = T, no.legend = F) 

PD Genes

FeaturePlot(DAT,features.plot = c("LRRK2", "GBA"),  
            cols.use = c("grey","purple","cyan","blue"),
            dark.theme = T, nCol = 2, overlay = T, no.legend = F) 

MS4A4A & MS4A6A

FeaturePlot(DAT, features.plot =  c("MS4A4A","MS4A6A"), 
            cols.use = c("grey","red","green","blue"),
            dark.theme = T, nCol = 2, overlay = T, no.legend = F)

DGE: CD16+ vs. CD16- Monocytes

Remove filters to get all DEGs.

clustDAT <- SubsetData(DAT, subset.name = "Cluster", accept.value = c(1,2), do.scale = F)

# subDAT <- SubsetData(DAT, cells.use = 1:500) 
DEGs_monocytes <- runDGE(DAT, meta_var = "Cluster", group1 = 1, group2 = 2, 
       allGenes = allGenes, test.use = test.use)

# DEGs_monocytes <- FindMarkers(DAT, min.pct = 0.25, only.pos = F,
#                                 ident.1 = 1, ident.2 = 2, test.use = "wilcox"
#                                 )

# DEGs_monocytes <- read.csv("Results/MonocyteSubtype.markers.csv", row.names = 1)
createDT(DEGs_monocytes, caption="DEGs between cluster 1 (CD16- monocytes) and cluster 2 (CD16+ monocytes")
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
write.csv(DEGs_monocytes, file.path(resultsPath, "MonocyteSubtype.markers.csv"), quote = F, row.names = T)

Identify Cluster-specific Biomarkers

identify_unique_markers <-function(DAT, clusterA, clusterB, allGenes=F, test.use="wilcox"){ 
  DAT <- SetIdent(DAT, ident.use = DAT@meta.data$Cluster) 
  if(allGenes==F){
      clustA.markers <- FindMarkers(DAT, ident.1=clusterA, min.pct = 0.25, 
                                    only.pos = F, test.use = test.use)
      clustB.markers <- FindMarkers(DAT, ident.1=clusterB, min.pct = 0.25, 
                                    only.pos = F, test.use = test.use) 
  }else{
      clustA.markers <- FindMarkers(DAT, ident.1=clusterA, 
                                    only.pos = F, test.use = test.use,
                                    logfc.threshold = 0, min.pct = 0, min.cells.group = 1,
                                    min.cells.gene = 1, min.diff.pct = -Inf)
      clustB.markers <- FindMarkers(DAT, ident.1=clusterB,  
                                    only.pos = F, test.use = test.use,
                                    logfc.threshold = 0, min.pct = 0, min.cells.group = 1,
                                    min.cells.gene = 1, min.diff.pct = -Inf)
  } 
  clustA.uniqueMarkers <- clustA.markers[!(row.names(clustA.markers) %in% row.names(clustB.markers)),] %>%
    subset(p_val_adj<=0.05)
  clustB.uniqueMarkers <- clustB.markers[!(row.names(clustB.markers) %in% row.names(clustA.markers)),] %>%
    subset(p_val_adj<=0.05)
  
  difference <- abs( length(row.names(clustA.uniqueMarkers)) - length(row.names(clustB.uniqueMarkers) ) )
  uniqueMarkers <- data.frame(Cluster0_markers=c(row.names(clustA.uniqueMarkers), rep("",difference) ),
                              Cluster1_markers=row.names(clustB.uniqueMarkers))
  write.csv(uniqueMarkers,
            file.path(resultsPath,"unique_cluster_markers.csv"), 
            quote = F, row.names = F)
  createDT(uniqueMarkers, "Unique/Mutually Exclusive Markers of Cluster 1 and Cluster 2") 
  return(uniqueMarkers)
}

uniqueMarkers <- identify_unique_markers(clustDAT, clusterA = 1, clusterB = 2, 
                                         allGenes=F, test.use = test.use)

DGE Across Clusters

Disease

# clustDAT@meta.data$dx %>% unique()
# FDR <- 0.05/dim(DEGs)[1]
# subset(DEGs, p_val<FDR)
DEGs <- runDGE(clustDAT, meta_var = "dx", group1 = "PD", group2 = "control", 
               allGenes = F, test.use = test.use) 

Mutation

# clustDAT@meta.data$dx %>% unique()
DEGs <- runDGE(clustDAT, meta_var = "mut", group1 = "PD", group2 = "GBA", 
               allGenes = F, test.use = test.use)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).

DGE Within Clusters

Disease

DGE_within_clusters(clustDAT,  meta_var = "dx", group1 = "PD", group2 = "control", 
                    clusterList = c(1,2), allGenes = F, test.use = test.use)
## Error in DGE_within_clusters(clustDAT, meta_var = "dx", group1 = "PD", : unused argument (test.use = test.use)

Mutation

DGE_within_clusters(DAT, meta_var = "mut", group1 = "PD", group2 = "GBA", 
                    clusterList = c(1,2), allGenes = F, test.use = test.use)
## Error in DGE_within_clusters(DAT, meta_var = "mut", group1 = "PD", group2 = "GBA", : unused argument (test.use = test.use)

DEG Enrichment w/ enrichR

enrichr_dbs <- c("KEGG_2018", "Reactome_2016",
                 "GO_Biological_Process_2018", "GO_Molecular_Function_2018", "GO_Cellular_Component_2018", 
                 "Rare_Diseases_AutoRIF_ARCHS4_Predictions", "Human_Gene_Atlas")
# createDT(enrichR::listEnrichrDbs(), "Enrichr Databases")
 
geneList <- data.frame(Gene=row.names(DEGs_monocytes), 
     Weight=scales::rescale(length(DEGs_monocytes$p_val_adj):1))

results <- enrichr(genes = geneList, databases = enrichr_dbs ) 

Uploading data to Enrichr…

## Error in curl::curl_fetch_memory(url, handle = handle): Couldn't connect to server
for (db in enrichr_dbs){
  cat('\n')
  cat("##",db,"\n")  
  # res <- subset(results[[db]], Adjusted.P.value<=0.05)
  createDT_html(results[[db]], paste("Enrichr Results:",db))
  cat('\n')
}  

KEGG_2018

## Error in crosstalk::is.SharedData(data): object 'results' not found

Save Results

save.image(file.path(resultsPath, "scRNAseq_results.RData"))

end_time <- Sys.time()
end_time - start_time
## Time difference of 1.022966 hours